import pandas as pd  
import numpy as np  
import seaborn as sns  
import matplotlib.pyplot as plt  
from sklearn.preprocessing import LabelEncoder  
from sklearn.linear_model import LogisticRegression  
from sklearn.model_selection import train_test_split  
from sklearn.metrics import classification_report, confusion_matrix  
import pickle
from decision_company import read_csv_file, encoder_instance, encode_column, choose_data_types, get_columns, corr, create_figure, show_plots, set_plot_title, save_plot, remove_labels, divide_dataset, create_LR_instance, classifier_training, classifier_predictions, create_heatmap

# Load the dataset  
credit_customers = read_csv_file("credit_customers.csv")  
  
# Encode categorical variables  
le = encoder_instance()  
categorical_columns = get_columns(choose_data_types(credit_customers, ['object']))  
for column in categorical_columns:  
    credit_customers[column] = encode_column(le, credit_customers[column])  
  
# Create a correlation matrix  
corr_matrix = corr(credit_customers)  
create_figure((12, 10))  
create_heatmap(corr_matrix, annot=True, cmap='coolwarm')  
set_plot_title("Correlation Matrix")  
save_plot('ref_result/Correlation_Matrix.png')  
show_plots()  
  
# Prepare data for logistic regression  
X = remove_labels(credit_customers, 'class')  
y = credit_customers['class']  
  
# Split data into training and testing sets  
X_train, X_test, y_train, y_test = divide_dataset(X, y, test_size=0.3, random_state=42)  
  
# Fit logistic regression model  
log_reg = create_LR_instance()  
log_reg = classifier_training(log_reg, X_train, y_train)  
  
# Predict on test set  
y_pred = classifier_predictions(log_reg, X_test)  
  
# Evaluate the model  
print(classification_report(y_test, y_pred)) 
pickle.dump(classification_report(y_test, y_pred), open("./ref_result/classification_report.pkl","wb"))
print(confusion_matrix(y_test, y_pred))  
pickle.dump(confusion_matrix(y_test, y_pred), open("./ref_result/confusion_matrix.pkl","wb"))
  
# Display feature importances  
feature_importances = pd.DataFrame(log_reg.coef_[0], index=X.columns, columns=['importance']).sort_values('importance', ascending=False)  
print("\nFeature Importances:")  
print(feature_importances)
pickle.dump(feature_importances, open("./ref_result/feature_importances.pkl","wb")) 


